sdata <- read.table("Sdata.csv", sep=" ", header=TRUE)
pairs(sdata)

plot(Y ~ X13, data=sdata)
lm1 <- lm(Y ~ X13 + I(X13^2), data=sdata)
b <- coef(lm1)
curve(b[1] + b[2]*x + b[3]*x^2, add=TRUE)

summary(lm1)
## 
## Call:
## lm(formula = Y ~ X13 + I(X13^2), data = sdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.040 -15.731  -3.089   6.602  69.116 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 33.79442    4.76248   7.096 2.11e-08 ***
## X13         -1.28604    0.55486  -2.318   0.0261 *  
## I(X13^2)    17.05763    0.03903 437.021  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.94 on 37 degrees of freedom
## Multiple R-squared:  0.9999, Adjusted R-squared:  0.9999 
## F-statistic: 1.727e+05 on 2 and 37 DF,  p-value: < 2.2e-16
pairs(cbind(R=lm1$res, sdata))

lm2 <- lm(Y ~ X13 + I(X13^2) + X2, data=sdata)
par(mfrow=c(1,2))
plot(lm2, which=1:2)

pairs(cbind(R=lm2$res, sdata), col=as.factor(sdata$X10))

lm3 <- lm(Y ~ X13 + I(X13^2) + X2 + I(X10==2) + I(X10==3) + I(X10==4), data=sdata)
summary(lm3)
## 
## Call:
## lm(formula = Y ~ X13 + I(X13^2) + X2 + I(X10 == 2) + I(X10 == 
##     3) + I(X10 == 4), data = sdata)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.324 -2.436 -0.216  2.007 17.467 
## 
## Coefficients:
##                  Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)     -6.060392   2.455816   -2.468  0.01895 *  
## X13             -0.299121   0.132282   -2.261  0.03046 *  
## I(X13^2)        17.011411   0.009588 1774.169  < 2e-16 ***
## X2               5.673183   0.227535   24.933  < 2e-16 ***
## I(X10 == 2)TRUE  9.334707   2.713169    3.441  0.00159 ** 
## I(X10 == 3)TRUE -5.802909   2.489933   -2.331  0.02604 *  
## I(X10 == 4)TRUE -0.030425   2.515469   -0.012  0.99042    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.205 on 33 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.118e+06 on 6 and 33 DF,  p-value: < 2.2e-16
par(mfrow=c(1,2))
plot(lm3, which=1:2)

lm4 <- lm(Y ~ X13 + I(X13^2) + X2 + I(X10==2) + I(X10==3), data=sdata)
summary(lm4)
## 
## Call:
## lm(formula = Y ~ X13 + I(X13^2) + X2 + I(X10 == 2) + I(X10 == 
##     3), data = sdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.3237 -2.4370 -0.2274  2.0080 17.4634 
## 
## Coefficients:
##                  Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)     -6.076582   2.028434   -2.996 0.005082 ** 
## X13             -0.299190   0.130201   -2.298 0.027845 *  
## I(X13^2)        17.011443   0.009064 1876.803  < 2e-16 ***
## X2               5.672467   0.216446   26.207  < 2e-16 ***
## I(X10 == 2)TRUE  9.353792   2.174441    4.302 0.000135 ***
## I(X10 == 3)TRUE -5.784409   1.935662   -2.988 0.005179 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.128 on 34 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.382e+06 on 5 and 34 DF,  p-value: < 2.2e-16
par(mfrow=c(1,2))
plot(lm4, which=1:2)

lm5 <- lm(Y ~ X13 + I(X13^2) + X2 + I(X10==2) + I(X10==3) + I(X10==2):X2, data=sdata)
summary(lm5)
## 
## Call:
## lm(formula = Y ~ X13 + I(X13^2) + X2 + I(X10 == 2) + I(X10 == 
##     3) + I(X10 == 2):X2, data = sdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.1183 -1.7709 -0.1383  1.6594 18.8668 
## 
## Coefficients:
##                     Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)        -5.383238   2.038060   -2.641   0.0125 *  
## X13                -0.234769   0.134240   -1.749   0.0896 .  
## I(X13^2)           17.007982   0.009162 1856.452   <2e-16 ***
## X2                  5.581265   0.220190   25.348   <2e-16 ***
## I(X10 == 2)TRUE     2.498485   4.918242    0.508   0.6148    
## I(X10 == 3)TRUE    -5.892473   1.898500   -3.104   0.0039 ** 
## X2:I(X10 == 2)TRUE  1.030564   0.666338    1.547   0.1315    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.026 on 33 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.199e+06 on 6 and 33 DF,  p-value: < 2.2e-16
par(mfrow=c(1,2))
plot(lm5, which=1:2)

\[ Y_i = \beta_0 + \beta_1 X_{13} + \beta_2 X_{13}^2 + \beta_3 X_2 + \beta_4 X_{10==2} + \beta_5 X_{10==3} + \epsilon_i \] Our estimates for the \(\beta\)’s in the above model are as follows:

confint(lm4, level=.99)
##                       0.5 %      99.5 %
## (Intercept)     -11.6109499 -0.54221480
## X13              -0.6544286  0.05604804
## I(X13^2)         16.9867131 17.03617369
## X2                5.0819184  6.26301577
## I(X10 == 2)TRUE   3.4210579 15.28652562
## I(X10 == 3)TRUE -11.0656571 -0.50316057
Term Estimate Lower Bound Upper Bound
\(\beta_0\) -6.076582
\(\beta_1\) -0.299190
pairs(cbind(R=lm4$res, sdata), col=as.factor(sdata$X11))

pairs(cbind(R=lm4$res, sdata), col=as.factor(sdata$X12))

pairs(cbind(R=lm4$res, sdata), col=as.factor(sdata$X10))